import PyPDF2
from docx import Document



def pdf_to_word(pdf_path, word_path):
    # 创建一个Word文档对象
    doc = Document()

    # 打开PDF文件
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        num_pages = len(reader.pages)

        # 遍历每一页
        for page_num in range(num_pages):
            page = reader.pages[page_num]
            text = page.extract_text()
            if text:
                doc.add_paragraph(text)

    # 保存Word文档
    doc.save(word_path)
    print(f"Converted '{pdf_path}' to '{word_path}'")


# 使用函数
pdf_file = 'F:\海思太科\医疗保障信息平台数据归集交换库设计说明书-V3.3.pdf'
word_file = 'F:\海思太科\医疗保障信息平台数据归集交换库设计说明书-V3.3111.docx'
pdf_to_word(pdf_file, word_file)